# Package Imporation
import numpy as np
import pandas as pd
## Reading into Files into DataFrames
cbb = pd.read_csv('cbb.csv')
cbb.head(5)
#### Lets try to understand some of our data more closely
print(f' The shape of the dataframe is {cbb.shape}')
cbb.groupby('CONF').mean()
#### Lets see if we can utilize pandas-profiling to derive an initial correlation for seed placements
import pandas_profiling
cbb.profile_report(style={'full_width':True})
### Lets explore our Dataset to see if we can find the top teams for ADE, AOE, Wins Overall
cbb.columns = map(str.lower, cbb.columns) #lowercase all the column names
cbb.groupby('team')['w'].sum().sort_values(ascending = False).head(10)
cbb.groupby('conf')['w'].sum().sort_values(ascending = False).head(10)
cbb[['team','conf','adjoe']].sort_values(by = ['adjoe'],ascending = False).head(10)
cbb[['team','conf','adjde']].sort_values(by = ['adjde'],ascending = False).head(10)
cbb.columns
#### Interesting Enough we always see that the ACC and Big10 Triamph over other conferences
#### Lets see if we can visualize this relationship
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot
from plotly.graph_objs import *
init_notebook_mode()
import matplotlib as plt
import seaborn as sns
import plotly.express as px
%matplotlib inline
fig = px.scatter(cbb,x = 'team',y = 'w',color = 'adjde')
fig.show()
px.scatter_3d(cbb, x = 'adjde',y = 'adjoe',z = 'barthag',color = 'w',title = '3d plot of Adjusted Defensive, Adjusted Offensive, and Power Ranking colored by wins')
#This is interesting some things that we can see is that seemingly offensive is better than defensive efficiency
first_seed = cbb[cbb[['postseason']].values == 'Champions']
first_seed.mean()
d